In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
run=pd.read_csv('plant_vase2.CSV')
In [3]:
run
Out[3]:
year month day hour minute second moisture0 moisture1 moisture2 moisture3 moisture4 irrgation
0 2020 3 11 14 44 39 0.59 0.63 0.51 0.45 0.01 False
1 2020 3 11 14 47 9 0.56 0.60 0.52 0.48 0.01 False
2 2020 3 11 14 49 39 0.56 0.54 0.54 0.51 0.01 False
3 2020 3 11 14 52 9 0.56 0.50 0.57 0.51 0.01 False
4 2020 3 11 14 54 39 0.57 0.53 0.58 0.51 0.01 False
... ... ... ... ... ... ... ... ... ... ... ... ...
10284 2020 3 29 11 53 33 0.03 0.96 0.93 0.99 0.01 False
10285 2020 3 29 11 56 3 0.03 0.96 0.90 0.99 0.01 False
10286 2020 3 29 11 58 33 0.03 0.96 0.91 0.99 0.01 False
10287 2020 3 29 12 1 3 0.03 0.96 0.89 0.99 0.01 False
10288 2020 3 29 12 3 33 0.03 0.96 0.89 0.99 0.01 False

10289 rows × 12 columns

In [4]:
run.drop(columns=['irrgation'])
Out[4]:
year month day hour minute second moisture0 moisture1 moisture2 moisture3 moisture4
0 2020 3 11 14 44 39 0.59 0.63 0.51 0.45 0.01
1 2020 3 11 14 47 9 0.56 0.60 0.52 0.48 0.01
2 2020 3 11 14 49 39 0.56 0.54 0.54 0.51 0.01
3 2020 3 11 14 52 9 0.56 0.50 0.57 0.51 0.01
4 2020 3 11 14 54 39 0.57 0.53 0.58 0.51 0.01
... ... ... ... ... ... ... ... ... ... ... ...
10284 2020 3 29 11 53 33 0.03 0.96 0.93 0.99 0.01
10285 2020 3 29 11 56 3 0.03 0.96 0.90 0.99 0.01
10286 2020 3 29 11 58 33 0.03 0.96 0.91 0.99 0.01
10287 2020 3 29 12 1 3 0.03 0.96 0.89 0.99 0.01
10288 2020 3 29 12 3 33 0.03 0.96 0.89 0.99 0.01

10289 rows × 11 columns

In [5]:
run.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10289 entries, 0 to 10288
Data columns (total 12 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   year       10289 non-null  int64  
 1   month      10289 non-null  int64  
 2   day        10289 non-null  int64  
 3   hour       10289 non-null  int64  
 4   minute     10289 non-null  int64  
 5   second     10289 non-null  int64  
 6   moisture0  10289 non-null  float64
 7   moisture1  10289 non-null  float64
 8   moisture2  10289 non-null  float64
 9   moisture3  10289 non-null  float64
 10  moisture4  10289 non-null  float64
 11  irrgation  10289 non-null  bool   
dtypes: bool(1), float64(5), int64(6)
memory usage: 894.4 KB
In [6]:
run.dtypes
Out[6]:
year           int64
month          int64
day            int64
hour           int64
minute         int64
second         int64
moisture0    float64
moisture1    float64
moisture2    float64
moisture3    float64
moisture4    float64
irrgation       bool
dtype: object
In [7]:
print(list(run))
['year', 'month', 'day', 'hour', 'minute', 'second', 'moisture0', 'moisture1', 'moisture2', 'moisture3', 'moisture4', 'irrgation']
In [8]:
sns.lmplot(x="day", y="moisture4", hue="moisture4",palette="rocket", data=run)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="icefire", data=run)
plt.show()
sns.lmplot(x="minute", y="moisture4", hue="moisture4",palette="coolwarm",data=run)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="cubehelix",data=run)
plt.show()

plt.show()
In [9]:
sns.lmplot(x="moisture0", y="moisture4", hue="moisture4",palette="YlOrBr", data=run)
plt.show()
sns.lmplot(x="moisture1", y="moisture4", hue="moisture4",palette="Blues", data=run)
plt.show()
sns.lmplot(x="moisture2", y="moisture4", hue="moisture4",palette="viridis", data=run)
plt.show()
sns.lmplot(x="moisture3", y="moisture4", hue="moisture4",palette="rocket_r", data=run)
plt.show()

plt.show()
In [10]:
run[run.columns[1:]].corr()['moisture3'][:]
Out[10]:
month             NaN
day          0.759488
hour        -0.038817
minute      -0.000235
second      -0.005488
moisture0   -0.600088
moisture1    0.274376
moisture2    0.648850
moisture3    1.000000
moisture4    0.313640
irrgation         NaN
Name: moisture3, dtype: float64
In [11]:
corr = run.corr()
corr.style.background_gradient(cmap='coolwarm')
C:\Users\DELL\anaconda3\lib\site-packages\pandas\io\formats\style.py:2813: RuntimeWarning: All-NaN slice encountered
  smin = np.nanmin(gmap) if vmin is None else vmin
C:\Users\DELL\anaconda3\lib\site-packages\pandas\io\formats\style.py:2814: RuntimeWarning: All-NaN slice encountered
  smax = np.nanmax(gmap) if vmax is None else vmax
Out[11]:
  year month day hour minute second moisture0 moisture1 moisture2 moisture3 moisture4 irrgation
year nan nan nan nan nan nan nan nan nan nan nan nan
month nan nan nan nan nan nan nan nan nan nan nan nan
day nan nan 1.000000 -0.081286 -0.002565 -0.003688 -0.901576 0.446852 0.688953 0.759488 0.209223 nan
hour nan nan -0.081286 1.000000 0.002132 -0.033278 0.008766 -0.032911 -0.061425 -0.038817 0.038641 nan
minute nan nan -0.002565 0.002132 1.000000 -0.001658 0.000744 0.000057 -0.000239 -0.000235 -0.015091 nan
second nan nan -0.003688 -0.033278 -0.001658 1.000000 0.006799 -0.004114 -0.006818 -0.005488 -0.006834 nan
moisture0 nan nan -0.901576 0.008766 0.000744 0.006799 1.000000 -0.223934 -0.546708 -0.600088 -0.060366 nan
moisture1 nan nan 0.446852 -0.032911 0.000057 -0.004114 -0.223934 1.000000 0.644527 0.274376 0.195880 nan
moisture2 nan nan 0.688953 -0.061425 -0.000239 -0.006818 -0.546708 0.644527 1.000000 0.648850 0.144897 nan
moisture3 nan nan 0.759488 -0.038817 -0.000235 -0.005488 -0.600088 0.274376 0.648850 1.000000 0.313640 nan
moisture4 nan nan 0.209223 0.038641 -0.015091 -0.006834 -0.060366 0.195880 0.144897 0.313640 1.000000 nan
irrgation nan nan nan nan nan nan nan nan nan nan nan nan
In [12]:
duplicate_rows_run = run[run.duplicated()]
In [13]:
run.head()
Out[13]:
year month day hour minute second moisture0 moisture1 moisture2 moisture3 moisture4 irrgation
0 2020 3 11 14 44 39 0.59 0.63 0.51 0.45 0.01 False
1 2020 3 11 14 47 9 0.56 0.60 0.52 0.48 0.01 False
2 2020 3 11 14 49 39 0.56 0.54 0.54 0.51 0.01 False
3 2020 3 11 14 52 9 0.56 0.50 0.57 0.51 0.01 False
4 2020 3 11 14 54 39 0.57 0.53 0.58 0.51 0.01 False
In [14]:
run[['moisture0', 'moisture1','moisture2', 'moisture3','moisture4']].plot()
Out[14]:
<AxesSubplot:>
In [15]:
run.drop(['irrgation','year','month'],axis=1,inplace=True)
In [16]:
run.isnull().sum()
Out[16]:
day          0
hour         0
minute       0
second       0
moisture0    0
moisture1    0
moisture2    0
moisture3    0
moisture4    0
dtype: int64
In [17]:
X = run.drop('moisture4',axis=1)
In [18]:
X.head()
Out[18]:
day hour minute second moisture0 moisture1 moisture2 moisture3
0 11 14 44 39 0.59 0.63 0.51 0.45
1 11 14 47 9 0.56 0.60 0.52 0.48
2 11 14 49 39 0.56 0.54 0.54 0.51
3 11 14 52 9 0.56 0.50 0.57 0.51
4 11 14 54 39 0.57 0.53 0.58 0.51
In [19]:
y = run['moisture4']
y.head()
Out[19]:
0    0.01
1    0.01
2    0.01
3    0.01
4    0.01
Name: moisture4, dtype: float64
In [20]:
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

sc = StandardScaler()
X = sc.fit_transform(X)

X
Out[20]:
array([[-1.74916072,  0.36177781,  0.83927869, ..., -0.47847994,
        -3.19532076, -2.72890505],
       [-1.74916072,  0.36177781,  1.01241716, ..., -0.6145305 ,
        -3.09231758, -2.52112266],
       [-1.74916072,  0.36177781,  1.1278428 , ..., -0.88663161,
        -2.88631124, -2.31334026],
       ...,
       [ 1.72581932, -0.07036474,  1.64725818, ...,  1.01807619,
         0.9248061 ,  1.01117803],
       [ 1.72581932,  0.07368278, -1.64237259, ...,  1.01807619,
         0.71879976,  1.01117803],
       [ 1.72581932,  0.07368278, -1.52694695, ...,  1.01807619,
         0.71879976,  1.01117803]])
In [21]:
from sklearn import metrics
X.shape
Out[21]:
(10289, 8)
In [22]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
In [23]:
y_test
Out[23]:
1825    0.01
7046    0.01
6186    0.00
2025    0.01
5414    0.00
        ... 
3945    0.01
164     0.01
6586    0.01
2715    0.01
9179    0.01
Name: moisture4, Length: 3087, dtype: float64
In [24]:
def predict(algorithm):
    model = algorithm.fit(X_train,y_train)
    print('Training Score: {}'.format(model.score(X_train,y_train)))
    print('Test Accuracy: {}'.format(model.score(X_test, y_test)))

    preds = model.predict(X_test)
    print('Predictions are: {}'.format(preds))
    print('\n')

    r2_score = metrics.r2_score(y_test,preds)
    print('r2_score is:{}'.format(r2_score))
   
    print('MAE:',metrics.mean_absolute_error(y_test,preds))
    print('MSE:',metrics.mean_squared_error(y_test,preds))
    print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,preds)))
    sns.distplot(y_test-preds,color='red')
In [25]:
from sklearn.metrics import accuracy_score as score
In [26]:
from sklearn.linear_model import LinearRegression
In [27]:
predict(LinearRegression())
Training Score: 0.1799268532108098
Test Accuracy: 0.1914286131772025
Predictions are: [0.0081312  0.00483921 0.00675845 ... 0.0061497  0.00912347 0.01056536]


r2_score is:0.1914286131772025
MAE: 0.0029209458009128264
MSE: 1.4439294462881086e-05
RMSE: 0.003799907165034573
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
In [28]:
ln_model = LinearRegression()
ln_model.fit(X_train, y_train)
preds1 = ln_model.predict(X_test)
preds1
Out[28]:
array([0.0081312 , 0.00483921, 0.00675845, ..., 0.0061497 , 0.00912347,
       0.01056536])
In [29]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds1, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [30]:
from sklearn.ensemble import RandomForestRegressor
predict(RandomForestRegressor())
Training Score: 0.9047847287002861
Test Accuracy: 0.3379567088972071
Predictions are: [0.0064 0.01   0.009  ... 0.009  0.01   0.01  ]


r2_score is:0.3379567088972071
MAE: 0.002115743440233237
MSE: 1.1822627146096536e-05
RMSE: 0.0034384047385519548
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [31]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
preds2 = rf.predict(X_test)
preds2
Out[31]:
array([0.0056, 0.0097, 0.0086, ..., 0.0092, 0.01  , 0.01  ])
In [32]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds2, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [33]:
from sklearn.neighbors import KNeighborsRegressor

predict(KNeighborsRegressor())
Training Score: 0.48203501320363396
Test Accuracy: 0.2580765908436512
Predictions are: [0.006 0.01  0.008 ... 0.01  0.01  0.01 ]


r2_score is:0.2580765908436512
MAE: 0.0022073210236475543
MSE: 1.3249109167476514e-05
RMSE: 0.0036399325773256454
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [34]:
knn = KNeighborsRegressor()
knn.fit(X_train, y_train)
preds3 = knn.predict(X_test)
preds3
Out[34]:
array([0.006, 0.01 , 0.008, ..., 0.01 , 0.01 , 0.01 ])
In [35]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds3, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [36]:
from sklearn.tree import DecisionTreeRegressor 

predict(DecisionTreeRegressor())
Training Score: 1.0
Test Accuracy: -0.12286207889432754
Predictions are: [0.   0.01 0.01 ... 0.01 0.01 0.01]


r2_score is:-0.12286207889432754
MAE: 0.0020051830255911913
MSE: 2.005183025591189e-05
RMSE: 0.0044779270043081194
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [37]:
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
preds4 = dt.predict(X_test)
preds4
Out[37]:
array([0.01, 0.01, 0.01, ..., 0.01, 0.01, 0.01])
In [38]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds4, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [39]:
from xgboost.sklearn import XGBRegressor

predict( XGBRegressor())
Training Score: 0.714357284952877
Test Accuracy: 0.3272909589754486
Predictions are: [0.00922653 0.01143963 0.00779632 ... 0.00851323 0.00929431 0.00961161]


r2_score is:0.3272909589754486
MAE: 0.0021950082869981423
MSE: 1.201309382139267e-05
RMSE: 0.003465991030195068
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [40]:
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
preds5 = xgb.predict(X_test)
preds5
Out[40]:
array([0.00922653, 0.01143963, 0.00779632, ..., 0.00851323, 0.00929431,
       0.00961161], dtype=float32)
In [41]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds5, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [42]:
from sklearn.tree import DecisionTreeRegressor 

predict(DecisionTreeRegressor())
Training Score: 1.0
Test Accuracy: -0.15188597754103061
Predictions are: [0.   0.01 0.01 ... 0.01 0.01 0.01]


r2_score is:-0.15188597754103061
MAE: 0.0020570132815030803
MSE: 2.0570132815030774e-05
RMSE: 0.0045354308301451115
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [43]:
dt = KNeighborsRegressor()
dt.fit(X_train, y_train)
preds4 = dt.predict(X_test)
preds4
Out[43]:
array([0.006, 0.01 , 0.008, ..., 0.01 , 0.01 , 0.01 ])
In [44]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds4, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [ ]: